// Vendored and slightly modified from: https://github.com/apache/arrow-rs-object-store/blob/c0e241eb95a61d52964f3d7741673b91f86db58b/src/parse.rs
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

use object_store::ClientOptions;
use object_store::ObjectStore;
use object_store::local::LocalFileSystem;
use object_store::memory::InMemory;
use object_store::path::Path;
use url::Url;

#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error("Unable to recognise URL \"{}\"", url)]
    Unrecognised { url: Url },

    #[error(transparent)]
    Path {
        #[from]
        source: object_store::path::Error,
    },
}

impl From<Error> for object_store::Error {
    fn from(e: Error) -> Self {
        Self::Generic {
            store: "URL",
            source: Box::new(e),
        }
    }
}

/// Recognizes various URL formats, identifying the relevant [`ObjectStore`]
///
/// See [`ObjectStoreScheme::parse`] for more details
///
/// # Supported formats:
/// - `file:///path/to/my/file` -> [`LocalFileSystem`]
/// - `memory:///` -> [`InMemory`]
/// - `s3://bucket/path` -> [`AmazonS3`](crate::aws::AmazonS3) (also supports `s3a`)
/// - `gs://bucket/path` -> [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
/// - `az://account/container/path` -> [`MicrosoftAzure`](crate::azure::MicrosoftAzure) (also supports `adl`, `azure`, `abfs`, `abfss`)
/// - `http://mydomain/path` -> [`HttpStore`](crate::http::HttpStore)
/// - `https://mydomain/path` -> [`HttpStore`](crate::http::HttpStore)
///
/// There are also special cases for AWS and Azure for `https://{host?}/path` paths:
/// - `dfs.core.windows.net`, `blob.core.windows.net`, `dfs.fabric.microsoft.com`, `blob.fabric.microsoft.com` -> [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
/// - `amazonaws.com` -> [`AmazonS3`](crate::aws::AmazonS3)
/// - `r2.cloudflarestorage.com` -> [`AmazonS3`](crate::aws::AmazonS3)
///
#[non_exhaustive] // permit new variants
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum ObjectStoreScheme {
    /// Url corresponding to [`LocalFileSystem`]
    Local,
    /// Url corresponding to [`InMemory`]
    Memory,
    /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
    AmazonS3,
    /// Url corresponding to [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
    GoogleCloudStorage,
    /// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
    MicrosoftAzure,
    /// Url corresponding to [`HttpStore`](crate::http::HttpStore)
    Http,
}

impl ObjectStoreScheme {
    /// Create an [`ObjectStoreScheme`] from the provided [`Url`]
    ///
    /// Returns the [`ObjectStoreScheme`] and the remaining [`Path`]
    ///
    /// # Example
    /// ```
    /// # use url::Url;
    /// # use object_store::ObjectStoreScheme;
    /// let url: Url = "file:///path/to/my/file".parse().unwrap();
    /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
    /// assert_eq!(scheme, ObjectStoreScheme::Local);
    /// assert_eq!(path.as_ref(), "path/to/my/file");
    ///
    /// let url: Url = "https://blob.core.windows.net/container/path/to/my/file".parse().unwrap();
    /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
    /// assert_eq!(scheme, ObjectStoreScheme::MicrosoftAzure);
    /// assert_eq!(path.as_ref(), "path/to/my/file");
    ///
    /// let url: Url = "https://example.com/path/to/my/file".parse().unwrap();
    /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
    /// assert_eq!(scheme, ObjectStoreScheme::Http);
    /// assert_eq!(path.as_ref(), "path/to/my/file");
    /// ```
    pub fn parse(url: &Url) -> Result<(Self, Path), Error> {
        let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);

        let (scheme, path) = match (url.scheme(), url.host_str()) {
            ("file", None) => (Self::Local, url.path()),
            ("memory", None) => (Self::Memory, url.path()),
            ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
            ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
            ("az", Some(_)) => (Self::MicrosoftAzure, strip_bucket().unwrap_or_default()),
            ("adl" | "azure" | "abfs" | "abfss", Some(_)) => (Self::MicrosoftAzure, url.path()),
            ("http", Some(_)) => (Self::Http, url.path()),
            ("https", Some(host)) => {
                if host.ends_with("dfs.core.windows.net")
                    || host.ends_with("blob.core.windows.net")
                    || host.ends_with("dfs.fabric.microsoft.com")
                    || host.ends_with("blob.fabric.microsoft.com")
                {
                    (Self::MicrosoftAzure, strip_bucket().unwrap_or_default())
                } else if host.ends_with("amazonaws.com") {
                    match host.starts_with("s3") {
                        true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
                        false => (Self::AmazonS3, url.path()),
                    }
                } else if host.ends_with("r2.cloudflarestorage.com") {
                    (Self::AmazonS3, strip_bucket().unwrap_or_default())
                } else {
                    (Self::Http, url.path())
                }
            }
            _ => return Err(Error::Unrecognised { url: url.clone() }),
        };

        Ok((scheme, Path::from_url_path(path)?))
    }
}

/// Create an [`ObjectStore`] based on the provided `url` and options
///
/// This method can be used to create an instance of one of the provided
/// `ObjectStore` implementations based on the URL scheme (see
/// [`ObjectStoreScheme`] for more details).
///
/// For example
/// * `file:///path/to/my/file` will return a [`LocalFileSystem`] instance
/// * `s3://bucket/path` will return an [`AmazonS3`] instance if the `aws` feature is enabled.
///
/// Arguments:
/// * `url`: The URL to parse
/// * `options`: A list of key-value pairs to pass to the [`ObjectStore`] builder.
///   Note different object stores accept different configuration options, so
///   the options that are read depends on the `url` value. One common pattern
///   is to pass configuration information via process variables using
///   [`std::env::vars`].
///
/// Returns
/// - An [`ObjectStore`] of the corresponding type
/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
///
/// [`AmazonS3`]: https://docs.rs/object_store/0.12.0/object_store/aws/struct.AmazonS3.html
pub fn parse_url_opts<I, K, V>(
    url: &Url,
    options: I,
) -> Result<(Box<dyn ObjectStore>, Path), object_store::Error>
where
    I: IntoIterator<Item = (K, V)>,
    K: AsRef<str>,
    V: Into<String>,
{
    let (scheme, path) = ObjectStoreScheme::parse(url)?;
    let path = Path::parse(path)?;

    let store: Box<dyn ObjectStore> = match scheme {
        ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()),
        ObjectStoreScheme::Memory => Box::new(InMemory::new()),
        ObjectStoreScheme::AmazonS3 => {
            let builder = options.into_iter().fold(
                object_store::aws::AmazonS3Builder::new()
                    .with_url(url.to_string())
                    .with_client_options(ClientOptions::default().with_timeout_disabled()),
                |builder, (key, value)| match key.as_ref().parse() {
                    Ok(k) => builder.with_config(k, value),
                    Err(_) => builder,
                },
            );
            Box::new(builder.build()?)
        }
        ObjectStoreScheme::GoogleCloudStorage => {
            let builder = options.into_iter().fold(
                object_store::gcp::GoogleCloudStorageBuilder::new()
                    .with_url(url.to_string())
                    .with_client_options(ClientOptions::default().with_timeout_disabled()),
                |builder, (key, value)| match key.as_ref().parse() {
                    Ok(k) => builder.with_config(k, value),
                    Err(_) => builder,
                },
            );
            Box::new(builder.build()?)
        }
        ObjectStoreScheme::MicrosoftAzure => {
            let builder = options.into_iter().fold(
                object_store::azure::MicrosoftAzureBuilder::new()
                    .with_url(url.to_string())
                    .with_client_options(ClientOptions::default().with_timeout_disabled()),
                |builder, (key, value)| match key.as_ref().parse() {
                    Ok(k) => builder.with_config(k, value),
                    Err(_) => builder,
                },
            );
            Box::new(builder.build()?)
        }
        s => {
            return Err(object_store::Error::Generic {
                store: "parse_url",
                source: format!("feature for {s:?} not enabled").into(),
            });
        }
    };

    Ok((store, path))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse() {
        let cases = [
            ("file:/path", (ObjectStoreScheme::Local, "path")),
            ("file:///path", (ObjectStoreScheme::Local, "path")),
            ("memory:/path", (ObjectStoreScheme::Memory, "path")),
            ("memory:///", (ObjectStoreScheme::Memory, "")),
            ("s3://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
            ("s3a://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
            (
                "https://s3.region.amazonaws.com/bucket",
                (ObjectStoreScheme::AmazonS3, ""),
            ),
            (
                "https://s3.region.amazonaws.com/bucket/path",
                (ObjectStoreScheme::AmazonS3, "path"),
            ),
            (
                "https://bucket.s3.region.amazonaws.com",
                (ObjectStoreScheme::AmazonS3, ""),
            ),
            (
                "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket",
                (ObjectStoreScheme::AmazonS3, ""),
            ),
            (
                "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path",
                (ObjectStoreScheme::AmazonS3, "path"),
            ),
            (
                "abfs://container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "abfs://file_system@account_name.dfs.core.windows.net/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "abfss://file_system@account_name.dfs.core.windows.net/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "https://account.dfs.core.windows.net",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.dfs.core.windows.net/container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "https://account.blob.core.windows.net",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.blob.core.windows.net/container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "az://account/container",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "az://account/container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "gs://bucket/path",
                (ObjectStoreScheme::GoogleCloudStorage, "path"),
            ),
            (
                "gs://test.example.com/path",
                (ObjectStoreScheme::GoogleCloudStorage, "path"),
            ),
            ("http://mydomain/path", (ObjectStoreScheme::Http, "path")),
            ("https://mydomain/path", (ObjectStoreScheme::Http, "path")),
            (
                "s3://bucket/foo%20bar",
                (ObjectStoreScheme::AmazonS3, "foo bar"),
            ),
            (
                "s3://bucket/foo bar",
                (ObjectStoreScheme::AmazonS3, "foo bar"),
            ),
            ("s3://bucket/😀", (ObjectStoreScheme::AmazonS3, "😀")),
            (
                "s3://bucket/%F0%9F%98%80",
                (ObjectStoreScheme::AmazonS3, "😀"),
            ),
            (
                "https://foo/bar%20baz",
                (ObjectStoreScheme::Http, "bar baz"),
            ),
            (
                "file:///bar%252Efoo",
                (ObjectStoreScheme::Local, "bar%2Efoo"),
            ),
            (
                "abfss://file_system@account.dfs.fabric.microsoft.com/",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "abfss://file_system@account.dfs.fabric.microsoft.com/",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.dfs.fabric.microsoft.com/",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.dfs.fabric.microsoft.com/container",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.dfs.fabric.microsoft.com/container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
            (
                "https://account.blob.fabric.microsoft.com/",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.blob.fabric.microsoft.com/container",
                (ObjectStoreScheme::MicrosoftAzure, ""),
            ),
            (
                "https://account.blob.fabric.microsoft.com/container/path",
                (ObjectStoreScheme::MicrosoftAzure, "path"),
            ),
        ];

        for (s, (expected_scheme, expected_path)) in cases {
            let url = Url::parse(s).unwrap();
            let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();

            assert_eq!(scheme, expected_scheme, "{s}");
            assert_eq!(path, Path::parse(expected_path).unwrap(), "{s}");
        }

        let neg_cases = [
            "unix:/run/foo.socket",
            "file://remote/path",
            "memory://remote/",
        ];
        for s in neg_cases {
            let url = Url::parse(s).unwrap();
            assert!(ObjectStoreScheme::parse(&url).is_err());
        }
    }
}
